import scrapy
import urllib.parse as up
import ktool
import pandas as  pd
import re
import json
url_level1 = 'https://www.runoob.com/html/html-tutorial.html';
class CainiaoSpider(scrapy.Spider):
    name = 'cainiao'
    # allowed_domains = ['']
    # start_urls = ['http:///']
    # 运行爬虫第一个执行的函数-->爬虫入口
    def start_requests(self):
        print("----------------")
        yield scrapy.Request(
            url = url_level1,
            callback=self.parse_start
    )
    def parse_start(self, response):
        content = response.body.decode('utf8','ignore')
        ext2= ktool.xpath.xpath_all(content,'//div[@id="leftcolumn"]/a/text()')
        hrefs= ktool.xpath.xpath_all(content,'//div[@id="leftcolumn"]/a/@href')
        text_a=[]
        hrefs_a=[]
        for ext in  ext2:
            # print(ext.strip())
            text_a.append(ext.strip())
        print(len(ext2))
        for href in  hrefs:
            # print(href)
            hrefs_a.append(href)
        pds=pd.DataFrame({'标题':text_a,'href':hrefs_a})
        # print(pds)
        writer = pd.ExcelWriter('nodeandhref1.xlsx')
        pds.to_excel(writer,sheet_name='节点名称和链接对照表')
        writer.save()
        yield pds
pass
